I] Arrange Data

duplicated(iris)
  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [11] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [21] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [41] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [51] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [81] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[101] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[141] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
class(iris$Species)
[1] "factor"
iris$Species[1:5]
[1] setosa setosa setosa setosa setosa
Levels: setosa versicolor virginica
# Cleaning Data
# Mutate
mutate(iris, aspect_ration = Petal.Width/Petal.Length)

# Aggregate
aggregate(Sepal.Length ~ Species, data = iris, FUN = 'mean')
aggregate(Sepal.Length ~ Species, data = iris, FUN = 'length')
aggregate(Species ~ Sepal.Length, data = iris, FUN = 'length')

# Group by & Summarize
library(DT)
new_iris <- iris %>% group_by(Species) %>% summarise(Avg_Sepal_Length = mean(Sepal.Length), SD_Sepal_Length = sd(Sepal.Length), Avg_Sepal_Width = mean(Sepal.Width), SD_Sepal_Width = sd(Sepal.Width), Avg_Petal_Length = mean(Petal.Length), SD_Petal_Length = sd(Petal.Length), Avg_Petal_Width = mean(Petal.Width), SD_Petal_Width = sd(Petal.Width))
datatable(new_iris, caption = "Mean & Standard Deviation") %>% formatRound(2:5, digits = 2)

II] Arrange Table

library(ggplot2)
library(plotly)
library(dplyr)
head(iris, 10)
count(iris, Species)
# Scatter Plot
p1 <- ggplot(iris, aes(x = Sepal.Length, y = Petal.Length, color = Species, shape = Species)) + geom_point(size = 3) + ggtitle('Petal Length vs Sepal Length') + geom_smooth(method = 'lm')
ggplotly(p1)
`geom_smooth()` using formula 'y ~ x'
# Generalized Pair Plot
library(GGally)
p2 <- ggpairs(iris, columns = 1:4, aes(color = Species)) + ggtitle("Anderson's Iris Dataset --- 3 Species")
p2

 plot: [1,1] [=>------------------------------------]  6% est: 0s 
 plot: [1,2] [====>---------------------------------] 12% est: 1s 
 plot: [1,3] [======>-------------------------------] 19% est: 1s 
 plot: [1,4] [=========>----------------------------] 25% est: 1s 
 plot: [2,1] [===========>--------------------------] 31% est: 1s 
 plot: [2,2] [=============>------------------------] 38% est: 1s 
 plot: [2,3] [================>---------------------] 44% est: 1s 
 plot: [2,4] [==================>-------------------] 50% est: 1s 
 plot: [3,1] [====================>-----------------] 56% est: 1s 
 plot: [3,2] [=======================>--------------] 62% est: 1s 
 plot: [3,3] [=========================>------------] 69% est: 1s 
 plot: [3,4] [===========================>----------] 75% est: 1s 
 plot: [4,1] [==============================>-------] 81% est: 0s 
 plot: [4,2] [================================>-----] 88% est: 0s 
 plot: [4,3] [===================================>--] 94% est: 0s 
 plot: [4,4] [======================================]100% est: 0s 
                                                                  

p3 <- pairs(iris[1:4], lower.panel = NULL, main = "Anderson's Iris Data -- 3 species", pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)])

p3
NULL
# Parallel Coordinate Plot
p4 <- ggparcoord(data = iris, columns = 1:4, groupColumn = "Species")
ggplotly(p4)

III] Other Plots

# Histogram
p5 <- ggplot(data = iris, aes(x = Sepal.Length)) + geom_histogram(binwidth = 0.2, color="black", aes(fill = Species)) + xlab("Sepal Length") + ylab("Frequency") + ggtitle("Histogram of Sepal Length") + geom_vline(data = iris, aes(xintercept = mean(Sepal.Length)), linetype = "dashed", color="grey")
ggplotly(p5)
# Density Plot
p6 <- ggplot(iris, aes(x = Sepal.Width, colour = Species, fill = Species)) + geom_density(alpha = .5) + geom_vline(aes(xintercept = mean(Sepal.Width)), linetype = "dashed", color = "grey", size = 1) + xlab("Sepal Width") + ylab("Density")
ggplotly(p6)
# Box Plot
p7 <- ggplot(iris, aes(Species, Petal.Length, fill = Species)) + geom_boxplot() + scale_y_continuous("Petal Length", breaks = seq(0, 10, by = .5)) + labs(title = "Iris Petal Length Box Plot", x = "Species")
ggplotly(p7)
# Violin Plot
p8 <- ggplot(iris, aes(Species, Petal.Width, fill = Species)) + scale_y_continuous("Petal Width", breaks = seq(0, 10, by = .5)) + labs(title = "Iris Petal Width Violin Plot", x = "Species") + geom_violin(aes(color = Species), alpha = .75)
ggplotly(p8)
LS0tDQp0aXRsZTogImlyaXMgRGF0YXNldCB3aXRoIGludGVyYWN0aXZpdHkiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KSV0gQXJyYW5nZSBEYXRhDQpgYGB7cn0NCmR1cGxpY2F0ZWQoaXJpcykNCmNsYXNzKGlyaXMkU3BlY2llcykNCmlyaXMkU3BlY2llc1sxOjVdDQpgYGANCmBgYHtyfQ0KIyBDbGVhbmluZyBEYXRhDQojIE11dGF0ZQ0KbXV0YXRlKGlyaXMsIGFzcGVjdF9yYXRpb24gPSBQZXRhbC5XaWR0aC9QZXRhbC5MZW5ndGgpDQoNCiMgQWdncmVnYXRlDQphZ2dyZWdhdGUoU2VwYWwuTGVuZ3RoIH4gU3BlY2llcywgZGF0YSA9IGlyaXMsIEZVTiA9ICdtZWFuJykNCmFnZ3JlZ2F0ZShTZXBhbC5MZW5ndGggfiBTcGVjaWVzLCBkYXRhID0gaXJpcywgRlVOID0gJ2xlbmd0aCcpDQphZ2dyZWdhdGUoU3BlY2llcyB+IFNlcGFsLkxlbmd0aCwgZGF0YSA9IGlyaXMsIEZVTiA9ICdsZW5ndGgnKQ0KDQojIEdyb3VwIGJ5ICYgU3VtbWFyaXplDQpsaWJyYXJ5KERUKQ0KbmV3X2lyaXMgPC0gaXJpcyAlPiUgZ3JvdXBfYnkoU3BlY2llcykgJT4lIHN1bW1hcmlzZShBdmdfU2VwYWxfTGVuZ3RoID0gbWVhbihTZXBhbC5MZW5ndGgpLCBTRF9TZXBhbF9MZW5ndGggPSBzZChTZXBhbC5MZW5ndGgpLCBBdmdfU2VwYWxfV2lkdGggPSBtZWFuKFNlcGFsLldpZHRoKSwgU0RfU2VwYWxfV2lkdGggPSBzZChTZXBhbC5XaWR0aCksIEF2Z19QZXRhbF9MZW5ndGggPSBtZWFuKFBldGFsLkxlbmd0aCksIFNEX1BldGFsX0xlbmd0aCA9IHNkKFBldGFsLkxlbmd0aCksIEF2Z19QZXRhbF9XaWR0aCA9IG1lYW4oUGV0YWwuV2lkdGgpLCBTRF9QZXRhbF9XaWR0aCA9IHNkKFBldGFsLldpZHRoKSkNCmRhdGF0YWJsZShuZXdfaXJpcywgY2FwdGlvbiA9ICJNZWFuICYgU3RhbmRhcmQgRGV2aWF0aW9uIikgJT4lIGZvcm1hdFJvdW5kKDI6NSwgZGlnaXRzID0gMikNCmBgYA0KSUldIEFycmFuZ2UgVGFibGUNCmBgYHtyfQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShwbG90bHkpDQpsaWJyYXJ5KGRwbHlyKQ0KaGVhZChpcmlzLCAxMCkNCmNvdW50KGlyaXMsIFNwZWNpZXMpDQpgYGANCmBgYHtyfQ0KIyBTY2F0dGVyIFBsb3QNCnAxIDwtIGdncGxvdChpcmlzLCBhZXMoeCA9IFNlcGFsLkxlbmd0aCwgeSA9IFBldGFsLkxlbmd0aCwgY29sb3IgPSBTcGVjaWVzLCBzaGFwZSA9IFNwZWNpZXMpKSArIGdlb21fcG9pbnQoc2l6ZSA9IDMpICsgZ2d0aXRsZSgnUGV0YWwgTGVuZ3RoIHZzIFNlcGFsIExlbmd0aCcpICsgZ2VvbV9zbW9vdGgobWV0aG9kID0gJ2xtJykNCmdncGxvdGx5KHAxKQ0KYGBgDQpgYGB7cn0NCiMgR2VuZXJhbGl6ZWQgUGFpciBQbG90DQpsaWJyYXJ5KEdHYWxseSkNCnAyIDwtIGdncGFpcnMoaXJpcywgY29sdW1ucyA9IDE6NCwgYWVzKGNvbG9yID0gU3BlY2llcykpICsgZ2d0aXRsZSgiQW5kZXJzb24ncyBJcmlzIERhdGFzZXQgLS0tIDMgU3BlY2llcyIpDQpwMg0KcDMgPC0gcGFpcnMoaXJpc1sxOjRdLCBsb3dlci5wYW5lbCA9IE5VTEwsIG1haW4gPSAiQW5kZXJzb24ncyBJcmlzIERhdGEgLS0gMyBzcGVjaWVzIiwgcGNoID0gMjEsIGJnID0gYygicmVkIiwgImdyZWVuMyIsICJibHVlIilbdW5jbGFzcyhpcmlzJFNwZWNpZXMpXSkNCnAzDQpgYGANCmBgYHtyfQ0KIyBQYXJhbGxlbCBDb29yZGluYXRlIFBsb3QNCnA0IDwtIGdncGFyY29vcmQoZGF0YSA9IGlyaXMsIGNvbHVtbnMgPSAxOjQsIGdyb3VwQ29sdW1uID0gIlNwZWNpZXMiKQ0KZ2dwbG90bHkocDQpDQpgYGANCklJSV0gT3RoZXIgUGxvdHMgDQpgYGB7cn0NCiMgSGlzdG9ncmFtDQpwNSA8LSBnZ3Bsb3QoZGF0YSA9IGlyaXMsIGFlcyh4ID0gU2VwYWwuTGVuZ3RoKSkgKyBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDAuMiwgY29sb3I9ImJsYWNrIiwgYWVzKGZpbGwgPSBTcGVjaWVzKSkgKyB4bGFiKCJTZXBhbCBMZW5ndGgiKSArIHlsYWIoIkZyZXF1ZW5jeSIpICsgZ2d0aXRsZSgiSGlzdG9ncmFtIG9mIFNlcGFsIExlbmd0aCIpICsgZ2VvbV92bGluZShkYXRhID0gaXJpcywgYWVzKHhpbnRlcmNlcHQgPSBtZWFuKFNlcGFsLkxlbmd0aCkpLCBsaW5ldHlwZSA9ICJkYXNoZWQiLCBjb2xvcj0iZ3JleSIpDQpnZ3Bsb3RseShwNSkNCmBgYA0KYGBge3J9DQojIERlbnNpdHkgUGxvdA0KcDYgPC0gZ2dwbG90KGlyaXMsIGFlcyh4ID0gU2VwYWwuV2lkdGgsIGNvbG91ciA9IFNwZWNpZXMsIGZpbGwgPSBTcGVjaWVzKSkgKyBnZW9tX2RlbnNpdHkoYWxwaGEgPSAuNSkgKyBnZW9tX3ZsaW5lKGFlcyh4aW50ZXJjZXB0ID0gbWVhbihTZXBhbC5XaWR0aCkpLCBsaW5ldHlwZSA9ICJkYXNoZWQiLCBjb2xvciA9ICJncmV5Iiwgc2l6ZSA9IDEpICsgeGxhYigiU2VwYWwgV2lkdGgiKSArIHlsYWIoIkRlbnNpdHkiKQ0KZ2dwbG90bHkocDYpDQpgYGANCmBgYHtyfQ0KIyBCb3ggUGxvdA0KcDcgPC0gZ2dwbG90KGlyaXMsIGFlcyhTcGVjaWVzLCBQZXRhbC5MZW5ndGgsIGZpbGwgPSBTcGVjaWVzKSkgKyBnZW9tX2JveHBsb3QoKSArIHNjYWxlX3lfY29udGludW91cygiUGV0YWwgTGVuZ3RoIiwgYnJlYWtzID0gc2VxKDAsIDEwLCBieSA9IC41KSkgKyBsYWJzKHRpdGxlID0gIklyaXMgUGV0YWwgTGVuZ3RoIEJveCBQbG90IiwgeCA9ICJTcGVjaWVzIikNCmdncGxvdGx5KHA3KQ0KYGBgDQpgYGB7cn0NCiMgVmlvbGluIFBsb3QNCnA4IDwtIGdncGxvdChpcmlzLCBhZXMoU3BlY2llcywgUGV0YWwuV2lkdGgsIGZpbGwgPSBTcGVjaWVzKSkgKyBzY2FsZV95X2NvbnRpbnVvdXMoIlBldGFsIFdpZHRoIiwgYnJlYWtzID0gc2VxKDAsIDEwLCBieSA9IC41KSkgKyBsYWJzKHRpdGxlID0gIklyaXMgUGV0YWwgV2lkdGggVmlvbGluIFBsb3QiLCB4ID0gIlNwZWNpZXMiKSArIGdlb21fdmlvbGluKGFlcyhjb2xvciA9IFNwZWNpZXMpLCBhbHBoYSA9IC43NSkNCmdncGxvdGx5KHA4KQ0KYGBg